; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -fast-isel -mtriple=i686-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefix=X86
; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefix=X64

; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/bmi-builtins.c

;
; AMD Intrinsics
;

define i16 @test__tzcnt_u16(i16 %a0) {
; X86-LABEL: test__tzcnt_u16:
; X86:       # %bb.0:
; X86-NEXT:    tzcntw {{[0-9]+}}(%esp), %ax
; X86-NEXT:    retl
;
; X64-LABEL: test__tzcnt_u16:
; X64:       # %bb.0:
; X64-NEXT:    tzcntw %di, %ax
; X64-NEXT:    retq
  %zext = zext i16 %a0 to i32
  %cmp = icmp ne i32 %zext, 0
  %cttz = call i16 @llvm.cttz.i16(i16 %a0, i1 false)
  ret i16 %cttz
}

define i32 @test__andn_u32(i32 %a0, i32 %a1) {
; X86-LABEL: test__andn_u32:
; X86:       # %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    xorl $-1, %eax
; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    retl
;
; X64-LABEL: test__andn_u32:
; X64:       # %bb.0:
; X64-NEXT:    movl %edi, %eax
; X64-NEXT:    xorl $-1, %eax
; X64-NEXT:    andl %esi, %eax
; X64-NEXT:    retq
  %xor = xor i32 %a0, -1
  %res = and i32 %xor, %a1
  ret i32 %res
}

define i32 @test__bextr_u32(i32 %a0, i32 %a1) {
; X86-LABEL: test__bextr_u32:
; X86:       # %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
; X86-NEXT:    retl
;
; X64-LABEL: test__bextr_u32:
; X64:       # %bb.0:
; X64-NEXT:    bextrl %esi, %edi, %eax
; X64-NEXT:    retq
  %res = call i32 @llvm.x86.bmi.bextr.32(i32 %a0, i32 %a1)
  ret i32 %res
}

define i32 @test__blsi_u32(i32 %a0) {
; X86-LABEL: test__blsi_u32:
; X86:       # %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT:    xorl %eax, %eax
; X86-NEXT:    subl %ecx, %eax
; X86-NEXT:    andl %ecx, %eax
; X86-NEXT:    retl
;
; X64-LABEL: test__blsi_u32:
; X64:       # %bb.0:
; X64-NEXT:    xorl %eax, %eax
; X64-NEXT:    subl %edi, %eax
; X64-NEXT:    andl %edi, %eax
; X64-NEXT:    retq
  %neg = sub i32 0, %a0
  %res = and i32 %a0, %neg
  ret i32 %res
}

define i32 @test__blsmsk_u32(i32 %a0) {
; X86-LABEL: test__blsmsk_u32:
; X86:       # %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT:    leal -1(%ecx), %eax
; X86-NEXT:    xorl %ecx, %eax
; X86-NEXT:    retl
;
; X64-LABEL: test__blsmsk_u32:
; X64:       # %bb.0:
; X64-NEXT:    # kill: def $edi killed $edi def $rdi
; X64-NEXT:    leal -1(%rdi), %eax
; X64-NEXT:    xorl %edi, %eax
; X64-NEXT:    retq
  %dec = sub i32 %a0, 1
  %res = xor i32 %a0, %dec
  ret i32 %res
}

define i32 @test__blsr_u32(i32 %a0) {
; X86-LABEL: test__blsr_u32:
; X86:       # %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT:    leal -1(%ecx), %eax
; X86-NEXT:    andl %ecx, %eax
; X86-NEXT:    retl
;
; X64-LABEL: test__blsr_u32:
; X64:       # %bb.0:
; X64-NEXT:    # kill: def $edi killed $edi def $rdi
; X64-NEXT:    leal -1(%rdi), %eax
; X64-NEXT:    andl %edi, %eax
; X64-NEXT:    retq
  %dec = sub i32 %a0, 1
  %res = and i32 %a0, %dec
  ret i32 %res
}

define i32 @test__tzcnt_u32(i32 %a0) {
; X86-LABEL: test__tzcnt_u32:
; X86:       # %bb.0:
; X86-NEXT:    tzcntl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    retl
;
; X64-LABEL: test__tzcnt_u32:
; X64:       # %bb.0:
; X64-NEXT:    tzcntl %edi, %eax
; X64-NEXT:    retq
  %cmp = icmp ne i32 %a0, 0
  %cttz = call i32 @llvm.cttz.i32(i32 %a0, i1 false)
  ret i32 %cttz
}

;
; Intel intrinsics
;

define i16 @test_tzcnt_u16(i16 %a0) {
; X86-LABEL: test_tzcnt_u16:
; X86:       # %bb.0:
; X86-NEXT:    tzcntw {{[0-9]+}}(%esp), %ax
; X86-NEXT:    retl
;
; X64-LABEL: test_tzcnt_u16:
; X64:       # %bb.0:
; X64-NEXT:    tzcntw %di, %ax
; X64-NEXT:    retq
  %zext = zext i16 %a0 to i32
  %cmp = icmp ne i32 %zext, 0
  %cttz = call i16 @llvm.cttz.i16(i16 %a0, i1 false)
  ret i16 %cttz
}

define i32 @test_andn_u32(i32 %a0, i32 %a1) {
; X86-LABEL: test_andn_u32:
; X86:       # %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    xorl $-1, %eax
; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    retl
;
; X64-LABEL: test_andn_u32:
; X64:       # %bb.0:
; X64-NEXT:    movl %edi, %eax
; X64-NEXT:    xorl $-1, %eax
; X64-NEXT:    andl %esi, %eax
; X64-NEXT:    retq
  %xor = xor i32 %a0, -1
  %res = and i32 %xor, %a1
  ret i32 %res
}

define i32 @test_bextr_u32(i32 %a0, i32 %a1, i32 %a2) {
; X86-LABEL: test_bextr_u32:
; X86:       # %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT:    andl $255, %ecx
; X86-NEXT:    andl $255, %eax
; X86-NEXT:    shll $8, %eax
; X86-NEXT:    orl %ecx, %eax
; X86-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
; X86-NEXT:    retl
;
; X64-LABEL: test_bextr_u32:
; X64:       # %bb.0:
; X64-NEXT:    andl $255, %esi
; X64-NEXT:    andl $255, %edx
; X64-NEXT:    shll $8, %edx
; X64-NEXT:    orl %esi, %edx
; X64-NEXT:    bextrl %edx, %edi, %eax
; X64-NEXT:    retq
  %and1 = and i32 %a1, 255
  %and2 = and i32 %a2, 255
  %shl = shl i32 %and2, 8
  %or = or i32 %and1, %shl
  %res = call i32 @llvm.x86.bmi.bextr.32(i32 %a0, i32 %or)
  ret i32 %res
}

define i32 @test_blsi_u32(i32 %a0) {
; X86-LABEL: test_blsi_u32:
; X86:       # %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT:    xorl %eax, %eax
; X86-NEXT:    subl %ecx, %eax
; X86-NEXT:    andl %ecx, %eax
; X86-NEXT:    retl
;
; X64-LABEL: test_blsi_u32:
; X64:       # %bb.0:
; X64-NEXT:    xorl %eax, %eax
; X64-NEXT:    subl %edi, %eax
; X64-NEXT:    andl %edi, %eax
; X64-NEXT:    retq
  %neg = sub i32 0, %a0
  %res = and i32 %a0, %neg
  ret i32 %res
}

define i32 @test_blsmsk_u32(i32 %a0) {
; X86-LABEL: test_blsmsk_u32:
; X86:       # %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT:    leal -1(%ecx), %eax
; X86-NEXT:    xorl %ecx, %eax
; X86-NEXT:    retl
;
; X64-LABEL: test_blsmsk_u32:
; X64:       # %bb.0:
; X64-NEXT:    # kill: def $edi killed $edi def $rdi
; X64-NEXT:    leal -1(%rdi), %eax
; X64-NEXT:    xorl %edi, %eax
; X64-NEXT:    retq
  %dec = sub i32 %a0, 1
  %res = xor i32 %a0, %dec
  ret i32 %res
}

define i32 @test_blsr_u32(i32 %a0) {
; X86-LABEL: test_blsr_u32:
; X86:       # %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT:    leal -1(%ecx), %eax
; X86-NEXT:    andl %ecx, %eax
; X86-NEXT:    retl
;
; X64-LABEL: test_blsr_u32:
; X64:       # %bb.0:
; X64-NEXT:    # kill: def $edi killed $edi def $rdi
; X64-NEXT:    leal -1(%rdi), %eax
; X64-NEXT:    andl %edi, %eax
; X64-NEXT:    retq
  %dec = sub i32 %a0, 1
  %res = and i32 %a0, %dec
  ret i32 %res
}

define i32 @test_tzcnt_u32(i32 %a0) {
; X86-LABEL: test_tzcnt_u32:
; X86:       # %bb.0:
; X86-NEXT:    tzcntl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    retl
;
; X64-LABEL: test_tzcnt_u32:
; X64:       # %bb.0:
; X64-NEXT:    tzcntl %edi, %eax
; X64-NEXT:    retq
  %cmp = icmp ne i32 %a0, 0
  %cttz = call i32 @llvm.cttz.i32(i32 %a0, i1 false)
  ret i32 %cttz
}

declare i16 @llvm.cttz.i16(i16, i1)
declare i32 @llvm.cttz.i32(i32, i1)
declare i32 @llvm.x86.bmi.bextr.32(i32, i32)
